clear all
use "datasets/MRA_waste_sites"

*Regression diagnostics based on WLS-RE PEESE

quietly metareg elas elas_var publish year_publish_c site_m NPL_n NPL_y NPL_u active_n active_u job_n job_u non_haz nuclear air water element_u North_America Asia Other_continent cleanup0 cleanup2 cleanup3 cleanup_unclear dist_greater_mean sample_c sale_ind num_expl_c oth_disamen oth_amen access industry demoecon time_control direction interaction log_log OLS_spatial, wsse(elas_SE) tau2test

scalar tau2 = e(tau2)
display tau2
gen elas_var_2= elas_var + tau2
gen elas_SE_2 = sqrt(elas_var_2)
gen precision_2= 1/(elas_SE_2)
gen precision_sq_2= 1/elas_var_2

foreach var in elas elas_SE elas_var publish year_publish_c site_m NPL_n NPL_y NPL_u active_n active_u job_n job_u non_haz nuclear air water element_u North_America Asia Other_continent cleanup0 cleanup2 cleanup3 cleanup_unclear dist_greater_mean sample_c sale_ind num_expl_c oth_disamen oth_amen access industry demoecon time_control direction interaction log_log OLS_spatial{
gen `var'2 = `var'/elas_SE_2
}

*manually deleted cleanup_not needed (due to manual weighting) / non-robust version to perform the tests
quietly reg elas2 precision_2 elas_var2 publish2 year_publish_c2 site_m2 NPL_n2 NPL_y2 NPL_u2 active_n2 active_u2 job_n2 job_u2 non_haz2 nuclear2 air2 water2 element_u2 North_America2 Asia2 Other_continent2 cleanup02 cleanup22 cleanup32 cleanup_unclear2 dist_greater_mean2 sample_c2 sale_ind2 num_expl_c2 oth_disamen2 oth_amen2 access2 industry2 demoecon2 time_control2 direction2 interaction2 log_log2 OLS_spatial2, nocons 

*------------------------------------------------------------------------
* Figure A5: Residual plots including outliers
*------------------------------------------------------------------------

*Some statistics cannot be computed due to "nocons" option, lvr2plot -->manually via predict + scatter, rvfplot --> manually via predict + scatter, hettest --> manually via predict

*Residual plots for unusual or influential data
predict fitted if e(sample), xb
predict residual if e(sample), resid
predict leverage if e(sample), leverage
gen res_sq=residual*residual
sum leverage res_sq
scatter leverage res_sq, graphregion(color(white)) ylab(0(0.2)0.8, nogrid) yline(0.0356164) xline(1.206613) xtitle(Normalized residual squared) // 177 647 649 842 maybe 176
graph rename Graph lvr2plot, replace
scatter residual fitted, yline(0) xtitle(Fitted values) graphregion(color(white)) ylab(-10(5)5, nogrid)  //176 177 647 648 649 842
graph rename Graph rvfplot, replace

*Normality of residuals 
kdensity residual if e(sample), normal graphregion(color(white)) xlab(-8(4)6, nogrid) ylab(0(0.1)0.5, nogrid) legend(position(6))
graph rename Graph kdensity, replace
qnorm residual if e(sample), graphregion(color(white)) // 176 177 647 649 842  
graph rename Graph qnorm, replace
graph combine lvr2plot rvfplot kdensity qnorm, graphregion(color(white))
graph export "figures/Figure A5. Residual plots including outliers.tif", replace as(tif)

*Heteroscedasticity 
*via Breusch-Pagan / Cook-Weisberg
gen residual_sq=residual^2/(e(rss)/e(N))
reg residual_sq fitted
display "Chi Square(1) = " e(mss)/2
display "Prob > chi2 = " chi2tail(1, e(mss)/2)
* via White
estat imtest, white
*homoscedasticity clearly rejected in both cases


*---------------------------------------------
* Figure A6: Residual plots excluding outliers
*---------------------------------------------
preserve
drop if ID_Uni==176
drop if ID_Uni==177
drop if ID_Uni==647
drop if ID_Uni==648
drop if ID_Uni==649
drop if ID_Uni==842

quietly metareg elas elas_var publish year_publish_c site_m NPL_n NPL_y NPL_u active_n active_u job_n job_u non_haz nuclear air water element_u North_America Asia Other_continent cleanup0 cleanup2 cleanup3 cleanup_unclear dist_greater_mean sample_c sale_ind num_expl_c oth_disamen oth_amen access industry demoecon time_control direction interaction log_log OLS_spatial, wsse(elas_SE) tau2test

scalar tau2 = e(tau2)
display tau2
gen elas_var_3= elas_var + tau2
gen elas_SE_3 = sqrt(elas_var_3)
gen precision_3= 1/(elas_SE_3)
gen precision_sq_3= 1/elas_var_3


foreach var in elas elas_SE elas_var publish year_publish_c site_m NPL_n NPL_y NPL_u active_n active_u job_n job_u non_haz nuclear air water element_u North_America Asia Other_continent cleanup0 cleanup2 cleanup3 cleanup_unclear dist_greater_mean sample_c sale_ind num_expl_c oth_disamen oth_amen access industry demoecon time_control direction interaction log_log OLS_spatial{
gen `var'3 = `var'/elas_SE_3
}

*manually deleted cleanup_not needed (due to manual weighting) / non-robust version to perfrom the tests
quietly reg elas3 precision_3 elas_var3 publish3 year_publish_c3 site_m3 NPL_n3 NPL_y3 NPL_u3 active_n3 active_u3 job_n3 job_u3 non_haz3 nuclear3 air3 water3 element_u3 North_America3 Asia3 Other_continent3 cleanup03 cleanup23 cleanup33 cleanup_unclear3 dist_greater_mean3 sample_c3 sale_ind3 num_expl_c3 oth_disamen3 oth_amen3 access3 industry3 demoecon3 time_control3 direction3 interaction3 log_log3 OLS_spatial3, nocons 

*Residual plots for unusual or influential data
predict fitted2 if e(sample), xb
predict residual2 if e(sample), resid
predict leverage2 if e(sample), leverage
gen res_sq2=residual2*residual2
sum leverage2 res_sq2
scatter leverage2 res_sq2, graphregion(color(white)) ylab(0(0.2)0.8, nogrid) yline(0.0358621) xline(1.325522) xtitle(Normalized residual squared)
graph rename Graph lvr2plot2, replace
scatter residual2 fitted2, yline(0) xtitle(Fitted values) graphregion(color(white))  ylab(-5(5)5, nogrid) 
graph rename Graph rvfplot2, replace

*Normality of residuals 
kdensity residual2 if e(sample), normal graphregion(color(white)) ylab(0(0.1)0.5, nogrid) legend(position(6)) 
graph rename Graph kdensity2, replace
qnorm residual2 if e(sample), graphregion(color(white))
graph rename Graph qnorm2, replace
graph combine lvr2plot2 rvfplot2 kdensity2 qnorm2, graphregion(color(white))
graph export "figures/Figure A6. Residual plots excluding outliers.tif", replace as(tif)

*Heteroscedasticity 
*via Breusch-Pagan / Cook-Weisberg
gen residual2_sq=residual2^2/(e(rss)/e(N))
reg residual2_sq fitted2
display "Chi Square(1) = " e(mss)/2
display "Prob > chi2 = " chi2tail(1, e(mss)/2)
* via White
estat imtest, white

restore
*----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
* The residual plots show some improvement when outliers are removed. Additionally heteroscedastiy is no longer present. Comparing the resulting regressions reveals no noticeable changes.
*----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

*------------------
*Multicollinearity
*------------------
* WLS with manual weights automatically induces high multicollinearity with the weight. This is expected and should not be an area of concern. However, to avoid inflated VIF values the AWEIGHT alternative or even the simple OLS version can be used. As Menard (2002, p.76) in Applied Logistic Regression Analysis puts it: "Because the concern is with the relationship among the independent variables, the functional form of the model for the dependent variable is irrelevant to the estimation of collinearity." The VIF results for the OLS and AWEIGHT alternative are very similar as well as the results with and without the potential outliers.

*Aweight
quietly reg elas elas_var publish year_publish_c site_m NPL_n NPL_y NPL_u active_n active_u job_n job_u non_haz nuclear air water element_u North_America Asia Other_continent cleanup0 cleanup2 cleanup3 cleanup_unclear dist_greater_mean sample_c sale_ind num_expl_c oth_disamen oth_amen access industry demoecon time_control direction interaction log_log OLS_spatial [aw=precision_sq_2]
vif

*OLS
quietly reg elas elas_var publish year_publish_c site_m NPL_n NPL_y NPL_u active_n active_u job_n job_u non_haz nuclear air water element_u North_America Asia Other_continent cleanup0 cleanup2 cleanup3 cleanup_unclear dist_greater_mean sample_c sale_ind num_expl_c oth_disamen oth_amen access industry demoecon time_control direction interaction log_log OLS_spatial
vif



clear

